This document will explore the distribution of MLGs across years as well as filtering strategies.
library(PramCurry)
## Loading required package: poppr
## Loading required package: adegenet
## Loading required package: ade4
## ==========================
## adegenet 1.4-2 is loaded
## ==========================
##
## - to start, type '?adegenet'
## - to browse adegenet website, type 'adegenetWeb()'
## - to post questions/comments: adegenet-forum@lists.r-forge.r-project.org
##
##
## This is poppr version 1.1.2.99.70. To get started, type package?poppr
library(reshape2)
library(ggplot2)
library(ape)
library(poppr)
library(adegenet)
options(stringsAsFactors = FALSE)
data(ramdat)
data(pop_data)
data(myPal)
devtools::source_gist(2877821)
## Sourcing https://gist.githubusercontent.com/baptiste/2877821/raw/d0e2fc28b88f912543b9d6a2a6bf799fd822524c/set_panel_size.r
## SHA-1 hash of file is 791923a0cf8a28d5d60479bdd8fb08808d11c467
## Loading required package: grid
sessionInfo()
## R version 3.1.2 (2014-10-31)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ape_3.1-4.5 ggplot2_1.0.0 reshape2_1.4 PramCurry_1.0
## [5] poppr_1.1.2.99-70 adegenet_1.4-2 ade4_1.6-2
##
## loaded via a namespace (and not attached):
## [1] assertthat_0.1 bitops_1.0-6 boot_1.3-11
## [4] caTools_1.17.1 colorspace_1.2-4 devtools_1.6
## [7] digest_0.6.4 dplyr_0.2 evaluate_0.5.5
## [10] fastmatch_1.0-4 formatR_1.0 ggmap_2.3
## [13] gtable_0.1.2 htmltools_0.2.6 httpuv_1.3.0
## [16] httr_0.5 igraph_0.7.1 jsonlite_0.9.11
## [19] knitr_1.6 lattice_0.20-29 mapproj_1.2-2
## [22] maps_2.3-9 MASS_7.3-34 Matrix_1.1-4
## [25] munsell_0.4.2 nlme_3.1-117 parallel_3.1.2
## [28] pegas_0.6 permute_0.8-3 phangorn_1.99-7
## [31] plyr_1.8.1 png_0.1-7 proto_0.3-10
## [34] Rcpp_0.11.2 RCurl_1.95-4.3 RgoogleMaps_1.2.0.6
## [37] rjson_0.2.14 RJSONIO_1.3-0 rmarkdown_0.3.10
## [40] scales_0.2.4 shiny_0.10.1 stringr_0.6.2
## [43] tools_3.1.2 vegan_2.0-10 xtable_1.7-4
## [46] yaml_2.1.13
First thing to do is to look at the distribution of the MLGs per year.
ramdat@other$oldmlg <- mlg.vector(ramdat)
(unfiltered <- mlg.table(ramdat, total = TRUE))
## | 2001
## | 2002
## | 2003
## | 2004
## | 2005
## | 2006
## | 2010
## | 2011
## | 2012
## | 2013
## | 2014
## | Total
## MLG.1 MLG.2 MLG.3 MLG.4 MLG.5 MLG.6 MLG.7 MLG.8 MLG.9 MLG.10 MLG.11
## 2001 0 0 0 1 0 0 0 0 0 0 0
## 2002 0 0 1 1 0 0 0 0 0 0 0
## 2003 0 0 0 1 0 0 0 0 0 0 0
## 2004 0 0 0 0 0 0 0 0 0 0 0
## 2005 0 0 0 0 0 0 0 0 0 0 0
## 2006 0 0 0 0 0 0 0 0 0 0 0
## 2010 0 0 0 0 0 0 0 0 0 0 0
## 2011 0 0 0 0 0 0 0 0 0 0 0
## 2012 0 0 0 0 0 0 0 0 0 0 0
## 2013 1 1 0 2 3 1 1 0 1 1 1
## 2014 0 0 0 0 0 0 0 1 0 0 0
## Total 1 1 1 5 3 1 1 1 1 1 1
## MLG.12 MLG.13 MLG.14 MLG.15 MLG.16 MLG.17 MLG.18 MLG.19 MLG.20
## 2001 0 0 0 0 1 0 0 1 1
## 2002 0 0 0 0 0 2 0 0 0
## 2003 0 0 3 0 1 4 0 0 1
## 2004 0 0 0 0 0 1 0 0 0
## 2005 0 0 0 0 0 0 0 0 0
## 2006 0 0 0 0 0 0 0 0 0
## 2010 0 0 0 0 0 0 0 0 0
## 2011 0 0 0 0 0 0 0 0 0
## 2012 0 0 2 0 0 1 5 0 0
## 2013 5 1 33 3 1 9 3 0 1
## 2014 1 0 5 2 0 0 0 0 0
## Total 6 1 43 5 3 17 8 1 3
## MLG.21 MLG.22 MLG.23 MLG.24 MLG.25 MLG.26 MLG.27 MLG.28 MLG.29
## 2001 3 28 0 1 0 1 1 0 0
## 2002 0 23 2 0 0 0 0 3 2
## 2003 5 58 1 1 0 0 2 4 3
## 2004 0 16 0 0 0 0 0 4 0
## 2005 0 0 0 0 0 0 0 0 0
## 2006 0 0 0 0 0 0 0 0 0
## 2010 0 0 0 0 0 0 0 0 0
## 2011 0 1 0 0 0 0 0 0 0
## 2012 1 0 8 1 0 0 0 0 0
## 2013 2 21 12 4 1 0 1 8 5
## 2014 0 2 0 0 0 0 0 2 1
## Total 11 149 23 7 1 1 4 21 11
## MLG.30 MLG.31 MLG.32 MLG.33 MLG.34 MLG.35 MLG.36 MLG.37 MLG.38
## 2001 0 0 0 0 0 0 0 0 0
## 2002 0 0 0 0 0 0 0 0 0
## 2003 0 0 0 0 0 0 0 0 0
## 2004 0 0 0 0 0 0 0 0 0
## 2005 1 0 0 0 0 0 0 0 0
## 2006 0 0 0 0 0 0 0 0 0
## 2010 0 0 0 1 0 0 0 0 0
## 2011 0 0 5 0 0 0 0 0 0
## 2012 0 0 0 0 0 0 0 0 0
## 2013 1 1 0 0 0 1 2 1 3
## 2014 0 0 0 2 1 0 0 0 0
## Total 2 1 5 3 1 1 2 1 3
## MLG.39 MLG.40 MLG.41 MLG.42 MLG.43 MLG.44 MLG.45 MLG.46 MLG.47
## 2001 0 0 1 0 0 0 0 0 0
## 2002 0 0 0 0 0 0 0 0 0
## 2003 0 0 0 0 0 0 0 0 0
## 2004 0 0 0 0 0 0 0 0 0
## 2005 0 0 0 0 0 0 0 0 0
## 2006 0 0 0 0 0 0 0 1 0
## 2010 0 0 0 0 0 0 0 0 0
## 2011 0 0 0 0 0 0 0 0 0
## 2012 0 0 0 0 0 0 0 0 0
## 2013 0 1 1 1 1 1 0 3 1
## 2014 1 0 0 0 0 0 2 1 0
## Total 1 1 2 1 1 1 2 5 1
## MLG.48 MLG.49 MLG.50 MLG.51 MLG.52 MLG.53 MLG.54 MLG.55 MLG.56
## 2001 0 0 0 0 0 0 0 0 0
## 2002 0 0 0 0 0 0 0 0 0
## 2003 0 0 0 2 0 0 0 0 0
## 2004 0 0 0 0 2 0 0 0 0
## 2005 0 0 0 0 0 0 0 0 0
## 2006 0 0 0 0 0 0 0 0 0
## 2010 0 0 0 0 0 0 0 0 0
## 2011 0 0 0 0 1 0 0 1 0
## 2012 0 0 0 0 0 2 0 0 0
## 2013 3 1 0 0 6 13 12 0 1
## 2014 0 0 1 0 4 2 1 0 0
## Total 3 1 1 2 13 17 13 1 1
## MLG.57 MLG.58 MLG.59 MLG.60 MLG.61 MLG.62 MLG.63 MLG.64 MLG.65
## 2001 0 0 0 0 0 0 0 0 0
## 2002 0 0 0 0 0 0 0 0 0
## 2003 0 0 0 1 0 0 0 1 1
## 2004 0 0 0 0 0 0 0 0 0
## 2005 0 0 0 0 0 0 0 0 0
## 2006 0 0 0 0 0 0 0 0 0
## 2010 0 0 0 0 0 0 0 0 0
## 2011 0 1 59 0 0 0 0 0 0
## 2012 0 0 0 0 0 0 0 0 0
## 2013 1 0 0 0 1 1 0 0 0
## 2014 0 0 0 0 0 0 1 0 0
## Total 1 1 59 1 1 1 1 1 1
## MLG.66 MLG.67 MLG.68 MLG.69 MLG.70
## 2001 0 0 0 0 0
## 2002 0 0 1 1 0
## 2003 0 1 10 1 1
## 2004 1 1 8 0 2
## 2005 0 0 1 0 0
## 2006 0 0 0 0 0
## 2010 0 0 0 0 0
## 2011 0 0 0 0 0
## 2012 0 0 0 0 0
## 2013 0 0 0 0 0
## 2014 0 0 0 0 0
## Total 1 2 20 2 3
totplot <- last_plot()
options(digits = 10)
(newReps <- other(ramdat)$REPLEN)
## PrMS6A1 Pr9C3A1 PrMS39A1 PrMS45A1 PrMS43A1
## 3 2 2 4 4
newReps[3] <- 4
(newReps <- fix_replen(ramdat, newReps))
## PrMS6A1 Pr9C3A1 PrMS39A1 PrMS45A1 PrMS43A1
## 3.00000 2.00000 4.00001 4.00000 4.00000
mlg.heatmap(unfiltered)
mlg.barplot(unfiltered, pal = myPal)
uf <- unfiltered[-nrow(unfiltered), ]
mlg.barplot(uf[rowSums(uf) > 10, ], total = FALSE, pal = myPal)
setpop(ramdat) <- ~ZONE1
zones <- mlg.table(ramdat, bar = FALSE)
# mlg.heatmap(zones)
mlg.barplot(zones, pal = myPal)
setpop(ramdat) <- ~Pop
(bars <- ggplot(totplot$data, aes(x = MLG, y = count, fill = MLG)) +
geom_bar(stat = "identity") +
theme_classic() +
scale_y_continuous(expand = c(0, 0), limits = c(0, 160)) +
scale_fill_manual(values = myPal) +
guides(fill = guide_legend(ncol = 2, title = NULL, )) +
geom_text(aes(label = count), size = 2.5, hjust = 0, font = "bold") +
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(),
#element_text(angle = -45, vjust = 1, hjust = 0, size = 5),
#legend.position = c(1, 0), legend.direction = "vertical",
legend.position = "none",
legend.text = element_text(size = rel(0.65)),
legend.justification = c(1, 0),
legend.background = element_rect(color = "black"),
legend.margin = grid::unit(0, "pt"),
legend.key.size = grid::unit(15, "pt"),
text = element_text(family = "Helvetica"),
axis.title.y = element_blank()) +
scale_x_discrete(limits = rev(totplot$data$MLG)) +
coord_flip())
# ggsave("mlg_dist.svg", plot = set_panel_size(bars,
# width = unit(3, "in"),
# height = unit(12, "in")),
# width = 6, height = 15)
# ggsave("mlg_dist_ppt.svg",
# plot = set_panel_size(bars, width = unit(30*1.2, "mm"),
# height = unit(225*1.2, "mm")),
# width = 88,
# height = 250,
# units = "mm",
# family = "Helvetica",
# scale = 1.2,
# pointsize = 12)
Get a sense of when these arose:
ramdat.cc <- clonecorrect(ramdat, hier = NA)
rangers <- mlg.crosspop(ramdat, mlgsub = unique(ramdat@mlg), df = TRUE, quiet = TRUE)
names(rangers)[2] <- "Year"
rangers$MLG <- factor(rangers$MLG, levels = colnames(unfiltered))
(ranges <- ggplot(rangers, aes(x = Year, y = MLG, group = MLG)) +
geom_line(aes(color = MLG), size = 1, linetype = 1) +
geom_point(aes(color = MLG), size = 5, pch = 21, fill = "white") +
geom_text(aes(label = Count), size = 2.5) +
scale_color_manual(values = myPal) +
guides(color = guide_legend(ncol = 3)) +
ylab("Multilocus Genotype") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
text = element_text(family = "Helvetica"),
legend.position = "none",
axis.line = element_line(colour = "black")) +
# theme(panel.grid.major.y = element_line(size = 0.5, color = "grey")) +
# theme(panel.grid.major.x = element_line(size = 1, color = "grey")) +
scale_y_discrete(limits = rev(totplot$data$MLG)))
# ggsave("mlg_range.svg", plot = set_panel_size(bars,
# width = unit(5, "in"),
# height = unit(12, "in")),
# width = 6, height = 15)
# ggsave("mlg_range_ppt.svg",
# plot = set_panel_size(ranges, width = unit(44*1.2, "mm"),
# height = unit(225*1.2, "mm")),
# width = 88, height = 250, units = "mm", family = "Helvetica",
# pointsize = 12, scale = 1.2)
pamat <- unfiltered > 0
pamat <- pamat[-nrow(pamat), ]
timetable <- MLG_range(pamat)
origin.df <- data.frame(timetable[paste("MLG", ramdat.cc@mlg, sep = "."), ])
origin.df$MLG <- rownames(origin.df)
counts <- unfiltered["Total", origin.df$MLG]
origin.df$newMLG <- paste(origin.df$first, pad_zeroes(1:nrow(origin.df)), sep = ".")
Now that we have this information, obtaining a bootstrapped dendrogram would be good. Bruvo’s distance would be a good choice since this is a clonal epidemic and we would expect mutations in a stepwise fashion.
set.seed(5555)
system.time(njtree <- bruvo.boot(ramdat, replen = newReps,#other(ramdat)$REPLEN,
sample = 100, quiet = TRUE, showtree = FALSE,
tree = "nj")
)
## Warning: Some branch lengths of the tree are negative. Normalizing
## branches according to Kuhner and Felsenstein (1994)
## user system elapsed
## 227.029 2.236 233.699
njtree$tip.label <- paste("MLG", ramdat@mlg, sep = ".")
njtree <- ladderize(njtree)
plot.phylo(njtree, cex = 0.8, font = 2, adj = 0, xpd = TRUE,
label.offset = 1/800, tip.col = myPal[ramdat@mlg])
nodelabels(ifelse(njtree$node.label > 50, njtree$node.label, NA),
adj = c(1.3, -0.5), frame = "n",
cex = 0.8, font = 3, xpd = TRUE)
add.scale.bar(lwd = 5)
set.seed(5001)
system.time(njtree.cc <- bruvo.boot(ramdat.cc, replen = newReps,#other(ramdat)$REPLEN,
sample = 1000, quiet = TRUE, showtree = FALSE,
tree = "nj")
)
## user system elapsed
## 56.917 2.678 62.375
njtree.cc$tip.label <- paste(origin.df$newMLG, origin.df$MLG, sep = "_")
names(myPal) <- njtree.cc$tip.label[order(mlgFromString(origin.df$MLG))]
yearPal <- deepseasun(nrow(pamat))
names(yearPal) <- rownames(pamat)
njtree.cc <- ladderize(njtree.cc)
par(mfrow = c(1, 2))
plot.phylo(njtree.cc, cex = 0.8, font = 2, xpd = TRUE, adj = 0,
tip.col = yearPal[substr(njtree.cc$tip.label, 1, 4)])
nodelabels(ifelse(njtree.cc$node.label > 50, njtree.cc$node.label, NA),
adj = c(1.3, -0.5),
frame = "n",
cex = 0.8, font = 3, xpd = TRUE)
add.scale.bar(lwd = 5)
legend("bottomright", fill = yearPal, legend = names(yearPal), bty = "n",
title = "Year first\nisolated",
cex = 0.75,)
plot.phylo(njtree.cc, cex = 0.8, font = 2, xpd = TRUE, adj = 0,
tip.col = myPal[njtree.cc$tip.label])
nodelabels(ifelse(njtree.cc$node.label > 50, njtree.cc$node.label, NA),
adj = c(1.3, -0.5),
frame = "n",
cex = 0.8, font = 3, xpd = TRUE)
add.scale.bar(lwd = 5)
par(mfrow = c(1, 1))
njtree.cc$tip.label <- origin.df$MLG
names(myPal) <- origin.df$MLG[order(mlgFromString(origin.df$MLG))]
assoc <- matrix(c(njtree$tip.label, njtree$tip.label), ncol = 2)
cophyloplot(njtree, njtree.cc, assoc, use.edge.length = FALSE, space = 1000,
show.tip.label = FALSE,
col = myPal[assoc[, 1]], lwd = 3, gap = 1)